
// Description: This code makes a single master dataset which contains North Carolina inpt data from 2007-2017. This master dataset contains insurance type, indicator variables for different types of procedures, and basic information such as the year.

//=============================Implement for 2008-2010==========================

// Looping through inpatient (North Carolina) data to make smaller analytic data set

clear

cd "~/Documents/Research/data/UNC Sheps/Stata"

forval t = 2008/2010 {
	
	use inpt_`t', clear 
	
	// The tempfile will store the data as you loop through other years
	
	tempfile inpt_mini_`t'	
	
	// We need to create our own year variable 
	// This is becuase the fyear variable given is sometimes wrong
	
	gen 	year = 0
	replace year = `t'
	
	// Insurance types
	
	gen medicaid = (payer1=="D")
	
	gen private  = (payer1=="B" | payer1=="H" | payer1=="I" | payer1=="J" | payer1=="L" | payer1=="Y" | payer1=="Z")
	
	gen otherIns = (payer1=="M" | payer1=="A" | payer1=="C" | payer1=="F" | payer1=="G" | payer1=="K" | payer1=="O" | payer1=="P" | payer1=="Q" | payer1=="R" | payer1=="T" | payer1=="V" | payer1=="W" | payer1=="X" | payer1=="U" | payer1=="1" | payer1=="2")
	
	// Race
	gen white=0
	replace white=1 if race=="4"
	replace white=. if race=="0" | race=="6"
	
	// Removing dx preceded by "E" or "V" since these are not of interest
	
	forval n = 1/24 {
					
		gen firstChar`n' = substr(diag`n',1,1)
						
		}
				
	forval n = 1/24 {
					
		replace diag`n'="" if firstChar`n'=="E" | firstChar`n'=="V"
						
		}
		
	drop firstChar1-firstChar24
	
	// Convert ICD-9 into decimal
	
	forval m = 1/24 {
			
		gen diagdec`m' = substr(diag`m',1,3) + "." + substr(diag`m',4,2)
		destring diagdec`m', gen(diag`m'_r)
			
		}
		
	// Hypertension complicating pregnancy childbirth and the puerperium using ICD9 codes
	
	gen compHypertension=0
	
	forval n = 1/24 {
			
		replace compHypertension = 1 if inrange(diag`n'_r, 642, 642.94)
				
		}
		
	// Overweight and Obesity using ICD9 codes
	
	gen overweightObese=0
	
	forval n = 1/24 {
			
		replace overweightObese = 1 if inrange(diag`n'_r, 278.0, 278.03)
				
		}
		
	// Diabetes - Type I, Type II, Diabetes complicating pregnancy, Abnormal glucose complicating pregnancy, Abnormal glucose, secondary diabetes
	
	gen diabetes=0
	
	forval n = 1/24 {
			
		replace diabetes = 1 if (inrange(diag`n'_r, 250, 250.93) | inrange(diag`n'_r, 648, 648.04) | inrange(diag`n'_r, 648.8, 648.84) | inrange(diag`n'_r, 790.2, 790.29) | inrange(diag`n'_r, 249, 249.91))
				
		}
	
	// Total procedure codes and convert procedure codes to numeric
	
	if (`t'==2008){
		egen procedureCount = rownonmiss(proccd1-proccd10), strok
		
		forval k = 1/10 {
	
			gen procdec`k' = substr(proccd`k',1,2) + "." + substr(proccd`k',3,2)
			destring procdec`k', gen(proccd`k'_r)
		
		}
	}
	else{
		egen procedureCount = rownonmiss(proccd1-proccd24), strok
		
		forval k = 1/24 {
	
			gen procdec`k' = substr(proccd`k',1,2) + "." + substr(proccd`k',3,2)
			destring procdec`k', gen(proccd`k'_r)
		
			}
	}
	
	
	// Proc codes for C-Section, Forceps or Vacuum, Medical Induction, Other
	//                C-Section = c_section
	//                Forceps or Vacuum = code72
	//                Medical Induction = drugInduce
	//                Other Obstetric Proc = otherObProc
	
	// A variable for all deliveries will also be needed = allDeliv
	// This variable will consist of: c_section, vagNoProc, vagProc
	
	gen c_section = 0
	gen code72 = 0
	gen drugInduce = 0
	gen otherObProc = 0
	
	gen vagNoProc= 0
	gen vagProc = 0
		
	if (`t'==2008){
		forval m = 1/10 {
			
			replace c_section = 1 if (inrange(proccd`m'_r, 74, 74.99) & proccd`m'_r!=74.3 & proccd`m'_r!=74.91)
			
			replace code72 = 1 if (inrange(proccd`m'_r, 72, 72.9) | proccd`m'_r==73.21 | proccd`m'_r==73.91 | proccd`m'_r==73.92) & proccd`m'_r!=72.8 & proccd`m'_r!=72.9
			
			replace drugInduce = 1 if proccd`m'_r==73.4
			
			replace otherObProc = 1 if (inrange(proccd`m'_r, 73, 73.99) | proccd`m'_r==72.8 | proccd`m'_r==72.9) & proccd`m'_r!=73.21 & proccd`m'_r!=73.91 & proccd`m'_r!=73.92 & proccd`m'_r!=73.51 & proccd`m'_r!=73.59
			
			
			
			replace vagNoProc = 1 if inrange(proccd`m'_r, 73.51, 73.59)
		
			replace vagProc = 1 if (inrange(proccd`m'_r, 72, 73.99) & proccd`m'_r!=73.51 & proccd`m'_r!=73.59)
			
			}
	}
	else{
		forval m = 1/24 {
		
			replace c_section = 1 if (inrange(proccd`m'_r, 74, 74.99) & proccd`m'_r!=74.3 & proccd`m'_r!=74.91)
			
			replace code72 = 1 if (inrange(proccd`m'_r, 72, 72.9) | proccd`m'_r==73.21 | proccd`m'_r==73.91 | proccd`m'_r==73.92) & proccd`m'_r!=72.8 & proccd`m'_r!=72.9
			
			replace drugInduce = 1 if proccd`m'_r==73.4
			
			replace otherObProc = 1 if (inrange(proccd`m'_r, 73, 73.99) | proccd`m'_r==72.8 | proccd`m'_r==72.9) & proccd`m'_r!=73.21 & proccd`m'_r!=73.91 & proccd`m'_r!=73.92 & proccd`m'_r!=73.51 & proccd`m'_r!=73.59
			
			
			
			replace vagNoProc = 1 if inrange(proccd`m'_r, 73.51, 73.59)
		
			replace vagProc = 1 if (inrange(proccd`m'_r, 72, 73.99) & proccd`m'_r!=73.51 & proccd`m'_r!=73.59)
		
			}
	}
	
	keep year fac ptcnty white medicaid private otherIns compHypertension overweightObese diabetes c_section code72 drugInduce otherObProc vagNoProc vagProc agey procedureCount dayscov totchg   
	
	save "`inpt_mini_`t''"

	}
	
// Recombine all the analytic subyears 	

use "`inpt_mini_2008'" , clear
append using "`inpt_mini_2009'"
append using "`inpt_mini_2010'"

compress
save "~/Desktop/m_inpt_mini_08to10", replace

//=============================Implement for 2011-2015========================== 

// Looping through inpt (North Carolina) data to make smaller analytic data set
// Years 2011-2015 had to be analyzed separately due to changes in insurance codes and race codes

clear

forval t = 2011/2015 {

	use inpt_`t' , clear
	
	tempfile inpt_mini_`t'
	
	// Year
	
	gen 	year = 0
	replace year = `t'
	
	// Insurance types
	
	gen medicaid = (payer1=="MC")
	
	gen private  = (payer1=="BL" | payer1=="HM" | payer1=="CI" | payer1=="14" | payer1=="15" | payer1=="13" | payer1=="12")
	
	gen otherIns = (payer1=="MA" | payer1=="09" | payer1=="10" | payer1=="11" | payer1=="16" | payer1=="CH" | payer1=="DS" | payer1=="LI" | payer1=="LM" | payer1=="MB" | payer1=="OF" | payer1=="TV" | payer1=="VA" | payer1=="WC" | payer1=="ZZ")
	
	// Race
	gen white=0
	replace white=1 if race=="5"
	replace white=. if race=="9"
	
	// Removing dx preceded by "E" or "V" since these are not of interest
				
	forval n = 1/25 {
					
		replace diag`n'="" if real(diag`n')==.
						
		}
	
	// Convert ICD-9 into decimal

	forval m = 1/25 {
			
		gen diagdec`m' = substr(diag`m',1,3) + "." + substr(diag`m',4,2)
		destring diagdec`m', gen(diag`m'_r)
			
		}
		
	// Hypertension complicating pregnancy childbirth and the puerperium using ICD9 codes
	
	gen compHypertension=0
	
	forval n = 1/25 {
			
		replace compHypertension = 1 if inrange(diag`n'_r, 642, 642.94)
				
		}
		
	// Overweight and Obesity using ICD9 codes
	
	gen overweightObese=0
	
	forval n = 1/25 {
			
		replace overweightObese = 1 if inrange(diag`n'_r, 278.0, 278.03)
				
		}
			
	// Diabetes - Type I, Type II, Diabetes complicating pregnancy, Abnormal glucose complicating pregnancy, Abnormal glucose, secondary diabetes
	
	gen diabetes=0
	
	forval n = 1/25 {
			
		replace diabetes = 1 if (inrange(diag`n'_r, 250, 250.93) | inrange(diag`n'_r, 648, 648.04) | inrange(diag`n'_r, 648.8, 648.84) | inrange(diag`n'_r, 790.2, 790.29) | inrange(diag`n'_r, 249, 249.91))
				
		}
	
	// Total procedure codes and convert procedure codes to numeric
	
	// Note: Single procedure code found containing letters
	
	egen procedureCount = rownonmiss(proccd1-proccd20), strok
		
	forval k = 1/20 {
	    
		replace proccd`k'="" if real(proccd`k')==.
		gen procdec`k' = substr(proccd`k',1,2) + "." + substr(proccd`k',3,2)
		destring procdec`k', gen(proccd`k'_r)
		
	}
	
	// Proc codes for C-Section, Forceps or Vacuum, Medical Induction, Other
	//                C-Section = c_section
	//                Forceps or Vacuum = code72
	//                Medical Induction = drugInduce
	//                Other Obstetric Proc = otherObProc
	
	// A variable for all deliveries will also be needed = allDeliv
	// This variable will consist of: c_section, vagNoProc, vagProc
	
	gen c_section = 0
	gen code72 = 0
	gen drugInduce = 0
	gen otherObProc = 0
	
	gen vagNoProc= 0
	gen vagProc = 0

	forval m = 1/20 {
		
		replace c_section = 1 if (inrange(proccd`m'_r, 74, 74.99) & proccd`m'_r!=74.3 & proccd`m'_r!=74.91)
			
		replace code72 = 1 if (inrange(proccd`m'_r, 72, 72.9) | proccd`m'_r==73.21 | proccd`m'_r==73.91 | proccd`m'_r==73.92) & proccd`m'_r!=72.8 & proccd`m'_r!=72.9
			
		replace drugInduce = 1 if proccd`m'_r==73.4
			
		replace otherObProc = 1 if (inrange(proccd`m'_r, 73, 73.99) | proccd`m'_r==72.8 | proccd`m'_r==72.9) & proccd`m'_r!=73.21 & proccd`m'_r!=73.91 & proccd`m'_r!=73.92 & proccd`m'_r!=73.51 & proccd`m'_r!=73.59
			
			
			
		replace vagNoProc = 1 if inrange(proccd`m'_r, 73.51, 73.59)
		
		replace vagProc = 1 if (inrange(proccd`m'_r, 72, 73.99) & proccd`m'_r!=73.51 & proccd`m'_r!=73.59)
		
		}
	
	keep year fac ptcnty white medicaid private otherIns compHypertension overweightObese diabetes c_section code72 drugInduce otherObProc vagNoProc vagProc agey procedureCount dayscov totchg
	
	save "`inpt_mini_`t''"

	}
	
* Recombine all the analytic subyears 	

use "`inpt_mini_2011'" , clear
append using "`inpt_mini_2012'"
append using "`inpt_mini_2013'"
append using "`inpt_mini_2014'"
append using "`inpt_mini_2015'"

compress
save "~/Desktop/m_inpt_mini_11to15", replace

//===========================Implement for 2016-2017 ==========================

// Looping through inpt (North Carolina) data to make smaller analytic data set
// Years 2016-2017 had to be analyzed separately due to implementation of ICD-10

clear

forval t = 2016/2017 {

	use inpt_`t' , clear
	
	tempfile inpt_mini_`t'
	
	// Year
	
	gen 	year = 0
	replace year = `t'
	
	// Insurance types
	
	gen medicaid = (payer1=="MC")
	
	gen private  = (payer1=="BL" | payer1=="HM" | payer1=="CI" | payer1=="14" | payer1=="15" | payer1=="13" | payer1=="12")
	
	gen otherIns = (payer1=="MA" | payer1=="09" | payer1=="10" | payer1=="11" | payer1=="16" | payer1=="CH" | payer1=="DS" | payer1=="LI" | payer1=="LM" | payer1=="MB" | payer1=="OF" | payer1=="TV" | payer1=="VA" | payer1=="WC" | payer1=="ZZ")
	
	// Race
	gen white=0
	replace white=1 if race=="5"
	replace white=. if race=="9"
	
	// Removing dx preceded by "E" or "V" since these are not of interest
				
	forval n = 1/25 {
					
		gen ICD9diag`n'=diag`n' if dxrefmeth`n'=="9"
		replace ICD9diag`n'="" if real(diag`n')==.
						
		}
	
	// Convert ICD-9 into decimal

	forval m = 1/25 {
			
		gen diagdec`m' = substr(ICD9diag`m',1,3) + "." + substr(ICD9diag`m',4,2)
		destring diagdec`m', gen(diag`m'_r)
			
		}
	
	// Grab first one of ICD-10
	
	forval m = 1/25 {
		
		gen firstOne`m' = substr(diag`m',1,1)
		
		}
	
	// Grab 2nd and 3rd numbers of ICD 10
	
	forval m = 1/25 {
		
		gen firstNum`m' = substr(diag`m',2,2)
		
		}
	
	forval m = 1/25 {
		
		gen firstNum`m'_r = real(firstNum`m')
		
		}
	
	// Grab first three of ICD-10
	
	forval m = 1/25 {
		
		gen firstThree`m' = substr(diag`m',1,3)
		
		}
	
	// Number with decimal following the letter - ICD10
	
	//------- Get Length
	
	forval m = 1/25 {
		
		gen strLength`m' = strlen(diag`m')
		
		}
	
	//-------- Remove first character
	
	forval m = 1/25 {
		
		gen numOnly`m' = substr(diag`m',2,strLength`m'-1)
		
		}
	
	//--------- Add decimal & Convert to numeric
	//--------- Note: ICD10 codes with letters at the end no longer appear
	
	forval m = 1/25 {
			
		gen ICD10dec`m' = substr(numOnly`m',1,2) + "." + substr(numOnly`m',3,strLength`m'-1)
		gen ICD10dec`m'_r =real(ICD10dec`m') 
			
		}
		
	// Hypertension complicating pregnancy childbirth and the puerperium using ICD9 codes
	
	gen compHypertension=0
	
	forval n = 1/25 {
			
		replace compHypertension = 1 if inrange(diag`n'_r, 642, 642.94) & dxrefmeth`n'=="9"
				
		}
		
	// Hypertension complicating pregnancy childbirth and the puerperium using ICD10 codes
		
	forval n = 1/25 {
			
		replace compHypertension = 1 if (firstThree`n'=="O10" | firstThree`n'=="O11" | firstThree`n'=="O13" | firstThree`n'=="O14" | firstThree`n'=="O15" | firstThree`n'=="O16") & dxrefmeth`n'=="0"
				
		}
	
	// Overweight and Obesity using ICD9 codes
	
	gen overweightObese=0
	
	forval n = 1/25 {
			
		replace overweightObese = 1 if inrange(diag`n'_r, 278.0, 278.03) & dxrefmeth`n'=="9"
				
		}
		
	// Overweight and Obesity using ICD10 codes
		
	forval n = 1/25 {
			
		replace overweightObese = 1 if firstThree`n'=="E66" & dxrefmeth`n'=="0"
				
		}
		
	// Diabetes - Type I, Type II, Diabetes complicating pregnancy, Abnormal glucose complicating pregnancy, Abnormal glucose, secondary diabetes
	
	gen diabetes=0
	
	forval n = 1/25 {
			
		replace diabetes = 1 if ((inrange(diag`n'_r, 250, 250.93) | inrange(diag`n'_r, 648, 648.04) | inrange(diag`n'_r, 648.8, 648.84) | inrange(diag`n'_r, 790.2, 790.29) | inrange(diag`n'_r, 249, 249.91)) & dxrefmeth`n'=="9")
				
		}
		
	forval n = 1/25 {
		
		replace diabetes = 1 if ((firstThree`n'=="E08" | firstThree`n'=="E09" | firstThree`n'=="E10" | firstThree`n'=="E11" | firstThree`n'=="E13" | firstThree`n'=="O24" | firstThree`n'=="R73" | (firstOne`n'=="O" & inrange(ICD10dec`n'_r, 99.81, 99.815) ))  & dxrefmeth`n'=="0")
		
		}
		
	// Total procedure codes and convert procedure codes to numeric
	
	egen procedureCount = rownonmiss(proccd1-proccd20), strok
	
	forval k = 1/20 {
	
		gen ICD9proccd`k'=proccd`k' if dxrefmeth`k'=="9"
		replace ICD9proccd`k'="" if real(ICD9proccd`k')==. & dxrefmeth`k'=="9"
		
		}
		
	forval k = 1/20 {
	
		gen procdec`k' = substr(ICD9proccd`k',1,2) + "." + substr(ICD9proccd`k',3,2) if dxrefmeth`k'=="9"
		destring procdec`k', gen(proccd`k'_r)
		
		}
		
	// Procedure first three characters
	
	forval k = 1/20 {
	
		gen procFstThree`k' = substr(proccd`k',1,3) if dxrefmeth`k'=="0"
		
		}
		
	// Procedure code - 7th character
	
	forval k = 1/20 {
	
		gen procLast`k' = substr(proccd`k',7,1) if dxrefmeth`k'=="0"
		gen procLast`k'_r = real(procLast`k') if dxrefmeth`k'=="0" 
		
		}
	
	// Proc codes for C-Section, Forceps or Vacuum, Medical Induction, Other
	//                C-Section = c_section
	//                Forceps or Vacuum = code72
	//                Medical Induction = drugInduce
	//                Other Obstetric Proc = otherObProc
	
	// A variable for all deliveries will also be needed = allDeliv
	// This variable will consist of: c_section, vagNoProc, vagProc
	
	gen c_section = 0
	gen code72 = 0
	gen drugInduce = 0
	gen otherObProc = 0
	
	gen vagNoProc= 0
	gen vagProc = 0


	forval m = 1/20 {
		
		replace c_section = 1 if (inrange(proccd`m'_r, 74, 74.99) & proccd`m'_r!=74.3 & proccd`m'_r!=74.91) & dxrefmeth`m'=="9"
		
		replace c_section = 1 if (procFstThree`m'=="10D" & inrange(procLast`m'_r, 0, 2)) & dxrefmeth`m'=="0"
			
		replace code72 = 1 if (inrange(proccd`m'_r, 72, 72.9) | proccd`m'_r==73.21 | proccd`m'_r==73.91 | proccd`m'_r==73.92) & proccd`m'_r!=72.8 & proccd`m'_r!=72.9 & dxrefmeth`m'=="9"
			
		replace drugInduce = 1 if proccd`m'_r==73.4 & dxrefmeth`m'=="9"
			
		replace otherObProc = 1 if (inrange(proccd`m'_r, 73, 73.99) | proccd`m'_r==72.8 | proccd`m'_r==72.9) & proccd`m'_r!=73.21 & proccd`m'_r!=73.91 & proccd`m'_r!=73.92 & proccd`m'_r!=73.51 & proccd`m'_r!=73.59 & dxrefmeth`m'=="9"
			
			
			
		replace vagNoProc = 1 if inrange(proccd`m'_r, 73.51, 73.59) & dxrefmeth`m'=="9"
		
		replace vagNoProc = 1 if procFstThree`m'=="10E" & dxrefmeth`m'=="0"
		
		replace vagProc = 1 if (inrange(proccd`m'_r, 72, 73.99) & proccd`m'_r!=73.51 & proccd`m'_r!=73.59) & dxrefmeth`m'=="9"
		
		}
		
	cd "~/Documents/Research/data/other"
		
	// ICD10 for code72
	
	forval n = 1/20 {
		
		rename proccd`n' Code72
		merge m:1 Code72 using ICD10_ForcepsVacuum.dta, gen(Code72_`n')
		rename Code72 proccd`n' 
			
		}	
		
	forval n = 1/20 {
		
		replace code72 = 1 if Code72_`n'==3 & dxrefmeth`n'=="0"
		drop if Code72_`n'==2
		
		}
	
	//ICD10 for drugInduce
	
	forval n = 1/20 {
		
		rename proccd`n' drugInduceVar
		merge m:1 drugInduceVar using ICD10_MedicalInduction.dta, gen(drugInduceVar`n')
		rename drugInduceVar proccd`n' 
			
		}	
		
	forval n = 1/20 {
		
		replace drugInduce = 1 if drugInduceVar`n'==3 & dxrefmeth`n'=="0"
		drop if drugInduceVar`n'==2
		
		}
	
	//ICD10 for otherObProc
	
	forval n = 1/20 {
		
		rename proccd`n' otherObProcVar
		merge m:1 otherObProcVar using ICD10_OtherObProc.dta, gen(otherObProcVar`n')
		rename otherObProcVar proccd`n' 
			
		}	
		
	forval n = 1/20 {
		
		replace otherObProc = 1 if otherObProcVar`n'==3 & dxrefmeth`n'=="0"
		drop if otherObProcVar`n'==2
		
		}
	
	//ICD10 for vagProc
	
	forval n = 1/20 {
		
		rename proccd`n' ICD10VagProc
		merge m:1 ICD10VagProc using ICD10_CompleteObProc.dta, gen(ICD10VagProc`n')
		rename ICD10VagProc proccd`n' 
			
		}	
		
	forval n = 1/20 {
		
		replace vagProc = 1 if ICD10VagProc`n'==3 & dxrefmeth`n'=="0"
		drop if ICD10VagProc`n'==2
		
		}
		
	cd "~/Documents/Research/data/UNC Sheps/Stata"
				
	keep year fac ptcnty white medicaid private otherIns compHypertension overweightObese diabetes c_section code72 drugInduce otherObProc vagNoProc vagProc agey procedureCount dayscov totchg

	save "`inpt_mini_`t''"

	}

* Recombine all the analytic subyears 	

use "`inpt_mini_2016'" , clear
append using "`inpt_mini_2017'"

compress
save "~/Desktop/m_inpt_mini_16to17", replace

//================================Final Data Set================================

clear

cd "~/Desktop"

// Combined data files
use m_inpt_mini_08to10.dta, clear
append using "m_inpt_mini_11to15.dta"
append using "m_inpt_mini_16to17.dta"

gen state="NC"

rename dayscov losdays

rename fac facility_id

// Convert facility from string to numeric
gen long facility_id_r = real(facility_id)
drop facility_id
rename facility_id_r facility_id

drop ptcnty

gen allDeliv=0
replace allDeliv=1 if vagNoProc==1 | vagProc==1 | c_section==1

compress
save "~/Desktop/m_inpt_mini_08to17", replace











